In [1]:
import numpy as np
import numpy.random as npr
import pylab as pl
In [2]:
%matplotlib inline
Overview of algorithm:
Computing clusters: They use an estimate of local density to down-sample the densest regions of the data. They then
In [3]:
from sklearn import neighbors
from sklearn.neighbors import KernelDensity
In [9]:
def generate_blobs(num_samples=5000,separation=8):
centers = np.array([[0,0],[1,0],[0,1],[1,1]],dtype=float)
centers -= 0.5
centers = np.vstack((centers,#centers*2,centers*3,
#centers*4,centers*5,centers*6,
#centers*7,centers*8,centers*9,
#centers*10,centers*11,centers*12,
#centers*13,centers*14,
[0,0]))
centers *= separation
kde = KernelDensity()
kde.fit(centers)
samples = kde.sample(num_samples)
density = np.exp(kde.score_samples(samples))
return samples,density
In [32]:
samples,density = generate_blobs(5000,10)
In [33]:
pl.scatter(samples[:,0],samples[:,1],
c=density,linewidth=0)
pl.axis('off')
Out[33]:
In [61]:
def local_density_k(X,k=10,metric=None):
if metric != None:
bt = neighbors.BallTree(X,200,metric=metric)
neighbor_graph = neighbors.kneighbors_graph(X,k,'distance')
else:
neighbor_graph = neighbors.kneighbors_graph(X,k,'distance')
distances = np.array(neighbor_graph.mean(1))[:,0]
#return np.exp(distances) * np.exp(distances.max())
return np.log(distances+1)
#return np.exp(1-((distances - distances.min())/(distances.max() - distances.min())))
In [52]:
def local_density_r(X,r=0.1,metric=None):
if metric != None:
bt = neighbors.BallTree(X,200,metric=metric)
neighbor_graph = neighbors.radius_neighbors_graph(bt,r)
else:
neighbor_graph = neighbors.radius_neighbors_graph(X,r)
counts = np.array(neighbor_graph.sum(1))[:,0]
return ((counts - counts.min())/(counts.max() - counts.min()))
In [53]:
pl.scatter(samples[:,0],samples[:,1],
c=local_density_r(samples,1.5,'l1'),linewidth=0)
pl.axis('off')
Out[53]:
In [54]:
pl.scatter(samples[:,0],samples[:,1],
c=local_density_r(samples,1.5),linewidth=0)
pl.axis('off')
Out[54]:
In [59]:
pl.scatter(samples[:,0],samples[:,1],
c=local_density_k(samples,10),linewidth=0)
pl.axis('off')
Out[59]:
In [56]:
pl.scatter(samples[:,0],samples[:,1],
c=local_density_k(samples,5,'l1'),linewidth=0)
pl.axis('off')
Out[56]:
In [18]:
from sklearn.linear_model import LinearRegression
In [60]:
k = [1,5,10,50,100,200]
for i in range(6):
pl.subplot(3,2,i+1)
est_density = local_density_k(samples,k[i])
pred_density = est_density
#lr = LinearRegression()
#X = np.reshape(est_density,(len(density),1))
#lr.fit(X,density)
#pred_density = lr.predict(X)
pl.scatter(pred_density,density,
#c=abs(density-pred_density),
#linewidth=0,
#cmap='spectral',
alpha=0.5
)
pl.title('k={0}'.format(k[i]))
pl.axis('off')
In [20]:
def plot_estimator_error(samples,density,estimator,params,
param_name='r'):
for i in range(6):
pl.subplot(3,2,i+1)
est_density = estimator(samples,params[i])
pred_density = est_density
#lr = LinearRegression()
#X = np.reshape(est_density,(len(density),1))
#lr.fit(X,density)
#pred_density = lr.predict(X)
pl.scatter(pred_density,density,
c='blue',
#c=abs(density-pred_density),
#linewidth=0,
#cmap='spectral',
alpha=0.5,
)
pl.title(param_name+'={0}'.format(params[i]))
pl.axis('off')
r = [0.01,0.1,0.25,0.5,1.0,2.0]
plot_estimator_error(samples,density,local_density_r,r,'r')
pl.figure()
k = [1,5,10,50,100,200]
plot_estimator_error(samples,density,local_density_k,k,'k')
In [21]:
ld_r_l1 = lambda X,r : local_density_r(X,r,metric='l1')
ld_k_l1 = lambda X,k : local_density_k(X,k,metric='l1')
r = [0.01,0.1,0.25,1.0,2.0,10.0]
plot_estimator_error(samples,density,ld_r_l1,r,'r')
pl.figure()
k = [1,5,10,50,100,200]
plot_estimator_error(samples,density,ld_k_l1,k,'k')
In [24]:
# let's try a higher dimensional example
centers = npr.randn(100,10)*10
kde = KernelDensity()
kde.fit(centers)
samples = kde.sample(5000)
density = np.exp(kde.score_samples(samples))
In [25]:
k = [1,5,10,50,100,200]
plot_estimator_error(samples,density,local_density_k,k,'k')
In [26]:
k = [1,5,10,20,30,50]
plot_estimator_error(samples,density,ld_k_l1,k,'k')
In [27]:
r = [2.0,3.0,5.0,6.0,7.0,10.0]
plot_estimator_error(samples,density,local_density_r,r,'r')
In [28]:
r = [3.0,5.0,6.0,7.0,10.0,12.0]
plot_estimator_error(samples,density,ld_r_l1,r,'r')
In [29]:
from sklearn.grid_search import GridSearchCV
# this snippet from documentation:
# http://scikit-learn.org/stable/auto_examples/neighbors/plot_digits_kde_sampling.html
params = {'bandwidth': np.logspace(-1, 1, 20)}
grid = GridSearchCV(KernelDensity(), params)
grid.fit(samples)
print("best bandwidth: {0}".format(grid.best_estimator_.bandwidth))
# use the best estimator to compute the kernel density estimate
kde = grid.best_estimator_
In [25]:
pred_density = np.exp(kde.score_samples(samples))
In [26]:
pl.scatter(pred_density,density)
pl.xlabel('predicted density')
pl.ylabel('actual density')
#pl.axis('off')
Out[26]:
In [27]:
pl.scatter(samples[:,0],samples[:,1],
c=-pred_density**2,linewidth=0)
Out[27]:
In [27]:
In [29]:
knn = neighbors.KNeighborsClassifier(1,metric='l1')
n = 100000
knn.fit(npr.randn(n,2),range(n))
Out[29]:
In [8]:
from sklearn.cluster import AgglomerativeClustering
ag = AgglomerativeClustering(10)
ag.fit(samples[:1000])
Out[8]:
In [30]:
In [31]:
knn = neighbors.KNeighborsClassifier(1,metric='l1')
knn.fit(samples[:1000],ag.labels_)
Out[31]:
In [32]:
knn.predict(samples)
Out[32]:
In [33]:
pl.scatter(samples[:,0],samples[:,1],
c=knn.predict(samples),
linewidth=0,alpha=0.5)
pl.axis('off')
Out[33]:
In [34]:
pl.scatter(samples[:1000,0],samples[:1000,1],
c=knn.predict(samples[:1000]),
linewidth=0,alpha=0.5)
pl.axis('off')
Out[34]:
In [35]:
len(set(knn.predict(samples)))
Out[35]:
In [9]:
# compute distance graph
import networkx as nx
def distance(x,y):
return np.sqrt(np.sum((x-y)**2))
def distance_graph(centers):
G = nx.Graph()
for i in range(len(centers)-1):
for j in range(i+1,len(centers)):
G.add_edge(i,j,weight=distance(centers[i],centers[j]))
return G
In [32]:
def SPADE(X,
num_clusters=50,
density_estimator='k-nearest',
# or 'kde' or 'eps-neighbors'
k=20,
r=1.0,
num_plots=1,
plot_overlay=False,
native_layout=False,
verbose=True):
num_points = len(X)
from time import time
t0 = time()
if verbose:
print("Estimating density...")
if density_estimator not in ['KDE','k-nearest','eps-neighbors']:
print('invalid density_estimator, defaulting to k-nearest')
density_estimator = 'k-nearest'
if density_estimator == 'k-nearest':
scores = local_density_k(X,k)
if density_estimator == 'eps-neighbors':
scores = local_density_r(X,r)
if density_estimator == 'kde':
from sklearn.grid_search import GridSearchCV
# this snippet from documentation:
# http://scikit-learn.org/stable/auto_examples/neighbors/plot_digits_kde_sampling.html
params = {'bandwidth': np.logspace(-1, 1, 20)}
grid = GridSearchCV(KernelDensity(), params)
grid.fit(samples)
print("best bandwidth: {0}".format(grid.best_estimator_.bandwidth))
# use the best estimator to compute the kernel density estimate
kde = grid.best_estimator_
# estimate density
kde.fit(X)
scores = kde.score_samples(X)
if verbose:
print("Done! Elapsed time: {0:.2f}s".format(time() - t0))
# "down-sample"
if verbose:
print("Down-sampling...")
accept_prob = 1 - (scores - scores.min()) / (scores.max() - scores.min())
down_sampled_X = []
down_sampled_i = []
for i in range(num_points):
if npr.rand() < accept_prob[i]:
down_sampled_i.append(i)
down_sampled_X.append(X[i])
down_sampled_X = np.array(down_sampled_X)
if verbose:
print("Done! Reduced data from {1} to {2} points. Elapsed time: {0:.2f}s".format(time() - t0,len(X),len(down_sampled_X)))
print("Clustering...")
# cluster down-sampled data
cluster_model = AgglomerativeClustering(num_clusters)
cluster_model.fit(down_sampled_X)
# compute cluster centers
cluster_pred = cluster_model.labels_
cluster_centers = []
for i in range(num_clusters):
cluster_centers.append(np.mean(down_sampled_X[cluster_pred==i],0))
cluster_centers = np.array(cluster_centers)
if verbose:
print("Done! Elapsed time: {0:.2f}s".format(time() - t0))
print("Up-sampling...")
knn = neighbors.KNeighborsClassifier(1,metric='l1')
knn.fit(down_sampled_X,cluster_pred)
upsampled_cluster_pred = knn.predict(X)
# "up-sample" (assign all points to nearest cluster center)
#upsampled_cluster_pred = assign_to_nearest_landmark(X,cluster_centers)
# compute normalized cluster occupancies
occupancy = np.array([sum(upsampled_cluster_pred==i) for i in range(num_clusters)])
norm_occupancy = (occupancy - occupancy.min()) / (occupancy.max() - occupancy.min())
if verbose:
print("Done! Elapsed time: {0:.2f}s".format(time() - t0))
print("Computing MST...")
# compute MST
G = distance_graph(cluster_centers)
mst = nx.minimum_spanning_tree(G)
if verbose:
print("Done! Elapsed time: {0:.2f}s".format(time() - t0))
print("Plotting SPADE tree...")
# plot SPADE tree
if X.shape[1] == 2 and plot_overlay:
print("Plotting overlay on data")
pl.scatter(X[:,0],X[:,1],c=upsampled_cluster_pred,linewidths=0,alpha=0.5)
pl.scatter(cluster_centers[:,0],cluster_centers[:,1]);
for e in mst.edges():
cpts = cluster_centers[np.array(e)]
pl.plot(cpts[:,0],cpts[:,1],c='black',linewidth=2)
pl.axis('off')
pl.figure()
# set positions by force-directed layout
pos = nx.graphviz_layout(mst)
positions = np.zeros((len(pos),2))
for p in pos:
positions[p] = pos[p]
# if the data is 2D, we can optionally use cluster centers
if X.shape[1] == 2 and native_layout:
positions = cluster_centers
for e in mst.edges():
cpts = positions[np.array(e)]
pl.plot(cpts[:,0],cpts[:,1],c='black',linewidth=2)
#pl.scatter(down_sampled_X[:,0],down_sampled_X[:,1],c=cluster_pred,linewidths=0,alpha=0.5)
pl.scatter(positions[:,0],positions[:,1],
c=cluster_centers[:,0],s=100+(200*norm_occupancy));
pl.axis('off')
print("Done! Total elapsed time: {0:.2f}s".format(time() - t0))
return cluster_centers,mst
In [11]:
samples,density = generate_blobs(50000,10)
In [12]:
corners = [0,1]
In [13]:
import itertools
square = [i for i in itertools.product(corners,corners)]
square
Out[13]:
In [59]:
cube = [i for i in itertools.product(corners,corners,corners)]
cube
Out[59]:
In [14]:
def hypercube(ndim=3):
corners = [-1,1]
corner_list = [corners for _ in xrange(ndim)]
return [i for i in itertools.product(*corner_list)]
In [15]:
h_corners = np.array(hypercube(5))*5
h_corners.shape
Out[15]:
In [61]:
h_corners = np.array(hypercube(2))
In [62]:
branches = np.array(h_corners)
for i in range(20):
branches = np.vstack((branches, h_corners*(i+2)))
branches = np.vstack((branches,np.zeros(branches.shape[1])))
In [65]:
import matplotlib.pyplot as pl
from mpl_toolkits.mplot3d import Axes3D
if branches.shape[1] > 2:
fig = pl.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(branches[:,0],branches[:,1],branches[:,2])
else:
pl.scatter(branches[:,0],branches[:,1])
pl.show()
In [57]:
kde = KernelDensity(bandwidth=0.5)
kde.fit(branches)
b_samples = kde.sample(10000)
In [66]:
if branches.shape[1] > 2:
fig = pl.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(b_samples[:,0],b_samples[:,1],b_samples[:,2])
else:
pl.scatter(b_samples[:,0],b_samples[:,1])
pl.show()
In [ ]:
In [69]:
results = SPADE(b_samples)
pl.show()
In [68]:
In [116]:
distance(h_corners[0],h_corners[-1]),distance(h_corners[0],h_corners[1])
Out[116]:
In [19]:
Out[19]:
In [39]:
sizes = [1,2,3,4,5]
dim = [2,3,4]
for (size,d) in itertools.product(sizes,dim):
h_corners = np.array(hypercube(d))*(size/2.0)
h_corners.shape
kde = KernelDensity()
kde.fit(h_corners)
h_samples = kde.sample(10000,random_state=0)
fig = pl.figure()
for i in range(4):
pl.subplot(2,2,i+1)
results = SPADE(h_samples,verbose=False)
fig.set_size_inches(10,10)
pl.savefig('{0}D, L={1} hypercube.pdf'.format(d,size))
In [27]:
In [24]:
G = distance_graph(h_corners)
mst = nx.minimum_spanning_tree(G)
In [25]:
pos = nx.graphviz_layout(mst)
positions = np.zeros((len(pos),2))
for p in pos:
positions[p] = pos[p]
Out[25]:
In [50]:
results = SPADE(samples)
In [ ]:
results = SPADE(samples,density_estimator='eps-neighbors')
In [92]:
# for reproducibility
npr.seed(2)
In [93]:
# generate some synthetic data
num_centers = 10
dim = 2
centers = np.zeros((num_centers,dim))
sizes = (npr.rand(num_centers)+0.5)
num_points = 10000
for i in xrange(1,num_centers):
centers[i] = centers[i-1] + npr.randn(dim)*3
X = np.zeros((num_points,dim))
Y = np.zeros(num_points)
for i in range(num_points):
c = npr.randint(num_centers)
X[i] = npr.randn(dim)*sizes[c] + centers[c]
Y[i] = c
In [94]:
pl.scatter(X[:,0],X[:,1],c=Y,linewidths=0,alpha=0.5)
pl.scatter(centers[:,0],centers[:,1],c=range(num_centers))
for i in range(num_centers-1):
cpts = centers[i:i+2]
#pl.plot(cpts[:,0],cpts[:,1],c='black',linewidth=2)
pl.axis('off')
Out[94]:
In [6]:
# to do: replace random walk with a branching process
In [96]:
from sklearn.neighbors import KernelDensity
In [97]:
bandwidth=0.5
kde = KernelDensity(bandwidth)
In [98]:
kde.fit(X)
scores = kde.score_samples(X)
In [99]:
scores.min(),scores.max()
Out[99]:
In [100]:
sigmoid = lambda x : (1 / (1 + np.exp(-x)))
In [101]:
accept_prob = 1 - (scores - scores.min()) / (scores.max() - scores.min())
accept_prob_s = sigmoid(accept_prob)
print(accept_prob.min(),accept_prob.max())
print(accept_prob_s.min(),accept_prob_s.max())
In [102]:
pl.scatter(X[:,0],X[:,1],c=(np.log(accept_prob+1)+0.3),linewidths=0,alpha=0.5)
#pl.colorbar();
pl.scatter(centers[:,0],centers[:,1]);
pl.axis('off')
Out[102]:
In [103]:
down_sampled_X = []
down_sampled_Y = []
down_sampled_i = []
for i in range(num_points):
if npr.rand() < accept_prob[i]:
down_sampled_i.append(i)
down_sampled_X.append(X[i])
down_sampled_Y.append(Y[i])
down_sampled_X = np.array(down_sampled_X)
down_sampled_Y = np.array(down_sampled_Y)
In [104]:
len(X)
Out[104]:
In [105]:
len(down_sampled_X)
Out[105]:
In [106]:
kde_2 = KernelDensity(bandwidth)
kde_2.fit(down_sampled_X)
new_densities = kde_2.score_samples(down_sampled_X)
In [107]:
new_densities.shape
Out[107]:
In [108]:
pl.scatter(down_sampled_X[:,0],down_sampled_X[:,1],c=-new_densities.max() - new_densities,linewidths=0,alpha=0.5)
#pl.colorbar();
pl.scatter(centers[:,0],centers[:,1]);
pl.axis('off')
Out[108]:
In [109]:
from sklearn.cluster import AgglomerativeClustering
In [110]:
cluster_model = AgglomerativeClustering(10)
cluster_model.fit(down_sampled_X)
Out[110]:
In [111]:
cluster_pred = cluster_model.labels_
In [112]:
pl.scatter(down_sampled_X[:,0],down_sampled_X[:,1],c=Y[down_sampled_i],linewidths=0,alpha=0.5)
#pl.colorbar();
pl.scatter(centers[:,0],centers[:,1]);
pl.axis('off')
Out[112]:
In [113]:
cluster_centers = []
for i in range(len(set(cluster_pred))):
cluster_centers.append(np.mean(down_sampled_X[cluster_pred==i],0))
cluster_centers = np.array(cluster_centers)
In [114]:
pl.scatter(down_sampled_X[:,0],down_sampled_X[:,1],c=cluster_pred,linewidths=0,alpha=0.5)
pl.scatter(cluster_centers[:,0],cluster_centers[:,1]);
pl.axis('off')
Out[114]:
In [26]:
cluster_true = Y[down_sampled_i]
In [27]:
from sklearn import metrics
In [28]:
# for just the downsampled points
metrics.adjusted_mutual_info_score(cluster_true,cluster_pred)
Out[28]:
In [121]:
def assign_to_nearest_landmark(points,landmarks):
nearest = np.zeros(len(points))
for i,point in enumerate(points):
distances = np.zeros(len(landmarks))
for j,landmark in enumerate(landmarks):
distances[j] = distance(point,landmark)
nearest[i] = np.argmin(distances)
return nearest
In [122]:
upsampled_cluster_pred = assign_to_nearest_landmark(X,cluster_centers)
In [123]:
knn = neighbors.KNeighborsClassifier(1,metric='l1')
knn.fit(down_sampled_X,cluster_pred)
Out[123]:
In [124]:
upsampled_cluster_pred = knn.predict(X)
In [125]:
pl.scatter(X[:,0],X[:,1],c=upsampled_cluster_pred,linewidths=0,alpha=0.5)
#pl.colorbar();
pl.scatter(cluster_centers[:,0],cluster_centers[:,1]);
pl.axis('off')
Out[125]:
In [126]:
# for all points
metrics.adjusted_mutual_info_score(Y,upsampled_cluster_pred)
In [127]:
from sklearn.cluster import KMeans
In [128]:
kmeans_model = KMeans(10)
In [129]:
kmeans_model.fit(X)
Out[129]:
In [130]:
kmeans_pred = kmeans_model.predict(X)
metrics.adjusted_mutual_info_score(Y,kmeans_pred)
In [132]:
G = distance_graph(centers)
mst = nx.minimum_spanning_tree(G)
In [133]:
mst.edges()
Out[133]:
In [134]:
pl.scatter(down_sampled_X[:,0],down_sampled_X[:,1],c=Y[down_sampled_i],linewidths=0,alpha=0.5)
pl.scatter(centers[:,0],centers[:,1]);
for e in mst.edges():
cpts = centers[np.array(e)]
pl.plot(cpts[:,0],cpts[:,1],c='black',linewidth=2)
pl.axis('off')
Out[134]:
In [135]:
G = distance_graph(cluster_centers)
mst = nx.minimum_spanning_tree(G)
pl.scatter(down_sampled_X[:,0],down_sampled_X[:,1],c=cluster_pred,linewidths=0,alpha=0.5)
pl.scatter(cluster_centers[:,0],cluster_centers[:,1]);
for e in mst.edges():
cpts = cluster_centers[np.array(e)]
pl.plot(cpts[:,0],cpts[:,1],c='black',linewidth=2)
pl.axis('off')
Out[135]:
In [136]:
occupancy = np.array([sum(upsampled_cluster_pred==i) for i in range(len(set(upsampled_cluster_pred)))])
norm_occupancy = (occupancy - occupancy.min()) / (occupancy.max() - occupancy.min())
In [137]:
norm_occupancy.max()
Out[137]:
In [143]:
G = distance_graph(cluster_centers)
mst = nx.minimum_spanning_tree(G)
for e in mst.edges():
cpts = cluster_centers[np.array(e)]
pl.plot(cpts[:,0],cpts[:,1],c='black',linewidth=2)
pl.scatter(down_sampled_X[:,0],down_sampled_X[:,1],c=cluster_pred,linewidths=0,alpha=0.5)
pl.scatter(cluster_centers[:,0],cluster_centers[:,1],
c=cluster_centers[:,0],
s=100+(200*norm_occupancy));
pl.axis('off')
Out[143]:
In [46]:
occupancy = np.array([sum(Y==i) for i in range(len(set(upsampled_cluster_pred)))])
norm_occupancy = (occupancy - occupancy.min()) / (occupancy.max() - occupancy.min())
In [47]:
G = distance_graph(centers)
mst = nx.minimum_spanning_tree(G)
for e in mst.edges():
cpts = centers[np.array(e)]
pl.plot(cpts[:,0],cpts[:,1],c='black',linewidth=2)
#pl.scatter(down_sampled_X[:,0],down_sampled_X[:,1],c=cluster_pred,linewidths=0,alpha=0.5)
pl.scatter(centers[:,0],centers[:,1],
c=centers[:,1],s=100+(200*norm_occupancy));
pl.axis('off')
Out[47]:
Re-doing the thing for a hypothesis of 50 clusters instead
In [86]:
results = SPADE(X[:1000])
In [ ]:
In [ ]:
In [227]:
pl.scatter(samples[:,0],samples[:,1],linewidths=0,
c=kde.score_samples(samples))
pl.axis('off')
Out[227]:
In [228]:
for i in range(10):
pl.figure()
results = SPADE(samples)
In [72]:
cluster_pred = cluster_model.labels_
In [73]:
cluster_centers = []
for i in range(len(set(cluster_pred))):
cluster_centers.append(np.mean(down_sampled_X[cluster_pred==i],0))
cluster_centers = np.array(cluster_centers)
In [74]:
upsampled_cluster_pred = assign_to_nearest_landmark(X,cluster_centers)
In [75]:
pl.scatter(down_sampled_X[:,0],down_sampled_X[:,1],c=cluster_pred,linewidths=0,alpha=0.5)
pl.scatter(cluster_centers[:,0],cluster_centers[:,1]);
pl.axis('off')
Out[75]:
In [76]:
pl.scatter(X[:,0],X[:,1],c=upsampled_cluster_pred,linewidths=0,alpha=0.5)
pl.scatter(cluster_centers[:,0],cluster_centers[:,1]);
pl.axis('off')
Out[76]:
In [77]:
G = distance_graph(cluster_centers)
mst = nx.minimum_spanning_tree(G)
pl.scatter(down_sampled_X[:,0],down_sampled_X[:,1],c=cluster_pred,linewidths=0,alpha=0.5)
pl.scatter(cluster_centers[:,0],cluster_centers[:,1]);
for e in mst.edges():
cpts = cluster_centers[np.array(e)]
pl.plot(cpts[:,0],cpts[:,1],c='black',linewidth=2)
pl.axis('off')
Out[77]:
In [78]:
occupancy = np.array([sum(Y==i) for i in range(len(set(upsampled_cluster_pred)))])
norm_occupancy = (occupancy - occupancy.min()) / (occupancy.max() - occupancy.min())
In [79]:
G = distance_graph(cluster_centers)
mst = nx.minimum_spanning_tree(G)
for e in mst.edges():
cpts = cluster_centers[np.array(e)]
pl.plot(cpts[:,0],cpts[:,1],c='black',linewidth=2)
#pl.scatter(down_sampled_X[:,0],down_sampled_X[:,1],c=cluster_pred,linewidths=0,alpha=0.5)
pl.scatter(cluster_centers[:,0],cluster_centers[:,1],
c=cluster_centers[:,1],s=100+(200*norm_occupancy));
pl.axis('off')
Out[79]:
In [135]:
#pos = nx.spectral_layout(mst)
pos = nx.graphviz_layout(mst)
In [136]:
positions = np.zeros((len(pos),2))
for p in pos:
positions[p] = pos[p]
In [ ]:
In [140]:
G = distance_graph(cluster_centers)
mst = nx.minimum_spanning_tree(G)
for e in mst.edges():
cpts = positions[np.array(e)]
pl.plot(cpts[:,0],cpts[:,1],c='black',linewidth=2)
#pl.scatter(down_sampled_X[:,0],down_sampled_X[:,1],c=cluster_pred,linewidths=0,alpha=0.5)
pl.scatter(positions[:,0],positions[:,1],
c=cluster_centers[:,0],
s=100+(200*norm_occupancy));
pl.axis('off')
Out[140]:
In [104]:
pl.scatter(positions[:,0],positions[:,1])
Out[104]:
In [ ]: